其他
ggplot2作图5招:《R入门25个菜谱》中所有图片的ggplot2版本
ggplot2出第二版我被hadley wickham致谢了,并且他给我寄来了签名书。很多人想问我怎么学画图,我只有一个秘籍,那就是勤加练习。我时常问自己,这个图我能不能画出来?若干年前,翻阅了一个简单的入门书,我就把里面所有的图片自己画了一遍,也就是本文了,虽然都比较简单,做为入门练习正正好,简单的图画多了,才有可能画复杂的图。复杂的图画多了,才有可能有所突破。
比如ggplot2中aes映射是不会作用于axis text的,theme也不能用aes映射,我就写了个函数,可以用选定变量给axis text上色并自动加入图例:
道理你都懂,学习无非是从简单入手,刻意练习,大量刻意的练习,走学术这条路,画图是躲不过的,不要再欺骗自己说Prism够用,ggplot2必须要搞起来,小白可以从点鼠标开始,请参考:
最后代码搞起来,而最好的学习资料莫过于作者的书,当然中文资料相对滞后。而进阶装逼,当然要持续关注本公众号。
1.16 Creating a Scatter Plot
plot(cars)
ggplot(cars,aes(speed,dist))+geom_point()
1.17 Creating a Bar Chart
heights <- tapply(airquality$Temp, airquality$Month, mean) par(mfrow=c(1,2))
barplot(heights)
barplot(heights,
main="Mean Temp. by Month",
names.arg=c("May", "Jun", "Jul", "Aug", "Sep"),
ylab="Temp (deg. F)")
require(gridExtra)
heights=ddply(airquality,.(Month), mean)
heights$Month=as.character(heights$Month)
p1 <- ggplot(heights, aes(x=Month,weight=Temp))+ geom_bar()
p2 <- ggplot(heights, aes(x=factor(Month,
labels=c("May", "Jun", "Jul", "Aug", "Sep")),
weight=Temp))+
geom_bar()+
ggtitle("Mean Temp. By Month") +
xlab("") + ylab("Temp (deg. F)")
grid.arrange(p1,p2, ncol=2)
1.18 Creating a Box Plot
y <- c(-5, rnorm(100), 5)boxplot(y)
ggplot()+geom_boxplot(aes(x=factor(1),y=y))+xlab("")+ylab("")
1.19 Creating a Histogram
data(Cars93, package="MASS")
par(mfrow=c(1,2))
hist(Cars93$MPG.city)
hist(Cars93$MPG.city, 20)
p <- ggplot(Cars93, aes(MPG.city))
p1 <- p + geom_histogram(binwidth=diff(range(Cars93$MPG.city))/5)
p2 <- p + geom_histogram(binwidth=diff(range(Cars93$MPG.city))/20)
grid.arrange(p1,p2, ncol=2)
1.23 Diagnosing a Linear Regression
data(iris)
m = lm( Sepal.Length ~ Sepal.Width, data=iris)
par(mfrow=c(2,2))
plot(m)
r <- residuals(m)
yh <- predict(m)
scatterplot <- function(x,y,
title="",
xlab="",
ylab="") {
d <- data.frame(x=x,y=y)
p <- ggplot(d, aes(x=x,y=y)) +
geom_point() +
ggtitle(title) +
xlab(xlab) +
ylab(ylab)
return(p)
}
p1 <- scatterplot(yh,r,
title="Residuals vs Fitted",
xlab="Fitted values",
ylab="Residuals")
p1 <- p1 +geom_hline(yintercept=0)+geom_smooth()
s <- sqrt(deviance(m)/df.residual(m))
rs <- r/sqqplot <- function(y,
distribution=qnorm,
title="Normal Q-Q",
xlab="Theretical Quantiles",
ylab="Sample Quantiles") {
require(ggplot2)
x <- distribution(ppoints(y))
d <- data.frame(x=x, y=sort(y))
p <- ggplot(d, aes(x=x, y=y)) +
geom_point() +
geom_line(aes(x=x, y=x)) +
ggtitle(title=title) +
xlab(xlab) +
ylab(ylab)
return(p)
}
p2 <- qqplot(rs, ylab="Standardized residuals")
sqrt.rs <- sqrt(abs(rs))
p3 <- scatterplot(yh,sqrt.rs,
title="Scale-Location",
xlab="Fitted values",
ylab=expression(sqrt("Standardized residuals")))
p3 <- p3 + geom_smooth()
hii <- lm.influence(m, do.coef = FALSE)$hat
p4 <- scatterplot(hii,rs)
p4 <- p4+
geom_hline(yintercept=0)+
geom_smooth() +
geom_text(aes(x=min(hii)+diff(range(hii))*0.3,
y=min(rs)+diff(range(rs))*0.04,
label="-- Cook's distance", size=3))+
theme(legend.position="none")
grid.arrange(p1,p2,p3,p4, ncol=2)